import pandas as pd
A = pd.read_csv("/users/ranjeetgaikwad/desktop/data science class/Cars93.csv")
A.head()
| Unnamed: 0 | Manufacturer | Model | Type | Min.Price | Price | Max.Price | MPG.city | MPG.highway | AirBags | ... | Passengers | Length | Wheelbase | Width | Turn.circle | Rear.seat.room | Luggage.room | Weight | Origin | Make | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Acura | Integra | Small | 12.9 | 15.9 | 18.8 | 25 | 31 | None | ... | 5 | 177 | 102 | 68 | 37 | 26.5 | 11.0 | 2705 | non-USA | Acura Integra |
| 1 | 2 | Acura | Legend | Midsize | 29.2 | 33.9 | 38.7 | 18 | 25 | Driver & Passenger | ... | 5 | 195 | 115 | 71 | 38 | 30.0 | 15.0 | 3560 | non-USA | Acura Legend |
| 2 | 3 | Audi | 90 | Compact | 25.9 | 29.1 | 32.3 | 20 | 26 | Driver only | ... | 5 | 180 | 102 | 67 | 37 | 28.0 | 14.0 | 3375 | non-USA | Audi 90 |
| 3 | 4 | Audi | 100 | Midsize | 30.8 | 37.7 | 44.6 | 19 | 26 | NaN | ... | 6 | 193 | 106 | 70 | 37 | 31.0 | 17.0 | 3405 | non-USA | Audi 100 |
| 4 | 5 | BMW | 535i | Midsize | 23.7 | 30.0 | 36.2 | 22 | 30 | Driver only | ... | 4 | 186 | 109 | 69 | 39 | 27.0 | 13.0 | 3640 | non-USA | BMW 535i |
5 rows × 28 columns
A.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 93 entries, 0 to 92 Data columns (total 28 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 93 non-null int64 1 Manufacturer 93 non-null object 2 Model 93 non-null object 3 Type 93 non-null object 4 Min.Price 93 non-null float64 5 Price 93 non-null float64 6 Max.Price 93 non-null float64 7 MPG.city 93 non-null int64 8 MPG.highway 93 non-null int64 9 AirBags 89 non-null object 10 DriveTrain 93 non-null object 11 Cylinders 93 non-null object 12 EngineSize 93 non-null float64 13 Horsepower 93 non-null int64 14 RPM 93 non-null int64 15 Rev.per.mile 93 non-null int64 16 Man.trans.avail 93 non-null object 17 Fuel.tank.capacity 93 non-null float64 18 Passengers 93 non-null int64 19 Length 93 non-null int64 20 Wheelbase 93 non-null int64 21 Width 93 non-null int64 22 Turn.circle 93 non-null int64 23 Rear.seat.room 91 non-null float64 24 Luggage.room 82 non-null float64 25 Weight 93 non-null int64 26 Origin 93 non-null object 27 Make 93 non-null object dtypes: float64(7), int64(12), object(9) memory usage: 20.5+ KB
#Univariate Analysis
cat = []
con = []
for i in A.columns:
if(A[i].dtypes == "object"):
cat.append(i)
else:
con.append(i)
import matplotlib.pyplot as plt
import seaborn as sb
for i in A.columns:
if(A[i].dtypes == "object"):
sb.boxplot(A[i],A.Price) #Boxplot between categorical and continuous columns
plt.xlabel(i)
plt.ylabel('Price')
plt.show()
else:
plt.scatter(A[i],A.Price) #Scatter plot between continuous and continuous columns
plt.xlabel('Price')
plt.ylabel(i)
plt.show()
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
cat
['Manufacturer', 'Model', 'Type', 'AirBags', 'DriveTrain', 'Cylinders', 'Man.trans.avail', 'Origin', 'Make']
con
['Unnamed: 0', 'Min.Price', 'Price', 'Max.Price', 'MPG.city', 'MPG.highway', 'EngineSize', 'Horsepower', 'RPM', 'Rev.per.mile', 'Fuel.tank.capacity', 'Passengers', 'Length', 'Wheelbase', 'Width', 'Turn.circle', 'Rear.seat.room', 'Luggage.room', 'Weight']
import seaborn as sb
import matplotlib.pyplot as plt
for i in con:
sb.distplot(A[i])
plt.show()
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
plt.figure(figsize=(40,19))
plt.subplot(3,3,1)
sb.distplot(A[['MPG.highway']])
plt.subplot(3,3,2)
sb.distplot(A.EngineSize)
plt.subplot(3,3,3)
sb.distplot(A.Horsepower)
plt.subplot(3,3,4)
sb.distplot(A[['Fuel.tank.capacity']])
plt.subplot(3,3,5)
sb.distplot(A.Weight)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning) /opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning) /opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning) /opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning) /opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
<AxesSubplot:xlabel='Weight', ylabel='Density'>
A['Price'].hist()
<AxesSubplot:>
sb.countplot(A.AirBags)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<AxesSubplot:xlabel='AirBags', ylabel='count'>
sb.countplot(A.Cylinders)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<AxesSubplot:xlabel='Cylinders', ylabel='count'>
A["Type"].value_counts()
Midsize 22 Small 21 Compact 16 Sporty 14 Large 11 Van 9 Name: Type, dtype: int64
A["Type"].value_counts().plot(kind = "barh")
<AxesSubplot:>
A["Type"].value_counts().plot(kind = "bar")
<AxesSubplot:>
A["Type"].value_counts().plot(kind = "pie")
<AxesSubplot:ylabel='Type'>
# Bivariate
#con vs con
plt.scatter(A.Price, A.EngineSize, c = "black")
plt.xticks(range(0, 80, 5))
plt.yticks(range(0, 6, 1))
plt.xlabel("Price")
plt.ylabel("EngineSize")
plt.title("Price avs Engine Scatterplot")
Text(0.5, 1.0, 'Price avs Engine Scatterplot')
plt.scatter(A.Price, A.Horsepower, c = "red")
<matplotlib.collections.PathCollection at 0x7fd07e19d580>
# cat vs con
sb.boxplot(A.Cylinders, A.Price)
/opt/anaconda3/envs/deep_learning/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<AxesSubplot:xlabel='Cylinders', ylabel='Price'>
#cat vs cat
pd.crosstab(A.Model, A.Type)
| Type | Compact | Large | Midsize | Small | Sporty | Van |
|---|---|---|---|---|---|---|
| Model | ||||||
| 100 | 0 | 0 | 1 | 0 | 0 | 0 |
| 190E | 1 | 0 | 0 | 0 | 0 | 0 |
| 240 | 1 | 0 | 0 | 0 | 0 | 0 |
| 300E | 0 | 0 | 1 | 0 | 0 | 0 |
| 323 | 0 | 0 | 0 | 1 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... |
| Taurus | 0 | 0 | 1 | 0 | 0 | 0 |
| Tempo | 1 | 0 | 0 | 0 | 0 | 0 |
| Tercel | 0 | 0 | 0 | 1 | 0 | 0 |
| Town_Car | 0 | 1 | 0 | 0 | 0 | 0 |
| Vision | 0 | 1 | 0 | 0 | 0 | 0 |
93 rows × 6 columns
#Multivariate Analysis
sb.pairplot(A)
<seaborn.axisgrid.PairGrid at 0x7fd07e659d30>
A.isna().sum()
Unnamed: 0 0 Manufacturer 0 Model 0 Type 0 Min.Price 0 Price 0 Max.Price 0 MPG.city 0 MPG.highway 0 AirBags 4 DriveTrain 0 Cylinders 0 EngineSize 0 Horsepower 0 RPM 0 Rev.per.mile 0 Man.trans.avail 0 Fuel.tank.capacity 0 Passengers 0 Length 0 Wheelbase 0 Width 0 Turn.circle 0 Rear.seat.room 2 Luggage.room 11 Weight 0 Origin 0 Make 0 dtype: int64
B = pd.DataFrame(A.isna().sum(), columns=["missing_values"])
B[B.missing_values>0]
| missing_values | |
|---|---|
| AirBags | 4 |
| Rear.seat.room | 2 |
| Luggage.room | 11 |
#function of fill the missing values
def replacer(df):
Q = pd.DataFrame(df.isna().sum())
Q.columns = ["CT"]
w = list(Q[Q.CT>0].index) #list of colummns with missing values
cat = [] #distinguishing between in categorical and continuous
con = []
for i in w:
if(df[i].dtypes == "object"):
cat.append(i)
else:
con.append(i)
for i in con: #filling in the missing values
replacer = df[i].mean()
df[i] = df[i].fillna(replacer)
for i in cat:
replacer = pd.DataFrame(df[i].value_counts()).index[0]
df[i] = df[i].fillna(replacer)
replacer(A)
B = pd.DataFrame(A.isna().sum(), columns = ["check"])
B
| check | |
|---|---|
| Unnamed: 0 | 0 |
| Manufacturer | 0 |
| Model | 0 |
| Type | 0 |
| Min.Price | 0 |
| Price | 0 |
| Max.Price | 0 |
| MPG.city | 0 |
| MPG.highway | 0 |
| AirBags | 0 |
| DriveTrain | 0 |
| Cylinders | 0 |
| EngineSize | 0 |
| Horsepower | 0 |
| RPM | 0 |
| Rev.per.mile | 0 |
| Man.trans.avail | 0 |
| Fuel.tank.capacity | 0 |
| Passengers | 0 |
| Length | 0 |
| Wheelbase | 0 |
| Width | 0 |
| Turn.circle | 0 |
| Rear.seat.room | 0 |
| Luggage.room | 0 |
| Weight | 0 |
| Origin | 0 |
| Make | 0 |
sb.pairplot(A)
<seaborn.axisgrid.PairGrid at 0x7fd06c85e9d0>
A.isna().sum()
Unnamed: 0 0 Manufacturer 0 Model 0 Type 0 Min.Price 0 Price 0 Max.Price 0 MPG.city 0 MPG.highway 0 AirBags 0 DriveTrain 0 Cylinders 0 EngineSize 0 Horsepower 0 RPM 0 Rev.per.mile 0 Man.trans.avail 0 Fuel.tank.capacity 0 Passengers 0 Length 0 Wheelbase 0 Width 0 Turn.circle 0 Rear.seat.room 0 Luggage.room 0 Weight 0 Origin 0 Make 0 dtype: int64